# devtools::install_local("geneRefineR/", force = T)
# library("echolocatoR")
source("echolocatoR/R/functions.R")
library(readxl)
library(DT)
library(data.table)
library(dplyr)
library(ggplot2)
library(plotly)
library(cowplot)
library(ggrepel)
library(curl)
library(biomaRt)
# Ensembl LD API
library(httr)
library(jsonlite)
library(xml2)
library(gaston)
library(RCurl)
# *** susieR ****
# library(knitrBootstrap) #install_github('jimhester/knitrBootstrap')
library(susieR) # devtools::install_github("stephenslab/susieR")
sessionInfo()## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
##
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] susieR_0.6.2.0411 tidyr_0.8.1 RCurl_1.95-4.11
## [4] bitops_1.0-6 gaston_1.5.4 RcppParallel_4.4.2
## [7] Rcpp_1.0.0 xml2_1.2.0 jsonlite_1.5
## [10] httr_1.3.1 biomaRt_2.38.0 curl_3.2
## [13] ggrepel_0.8.0 cowplot_0.9.3 plotly_4.7.1
## [16] ggplot2_3.1.0 dplyr_0.7.6 data.table_1.11.8
## [19] DT_0.4 readxl_1.1.0
##
## loaded via a namespace (and not attached):
## [1] lattice_0.20-35 prettyunits_1.0.2 assertthat_0.2.0
## [4] rprojroot_1.3-2 digest_0.6.18 R6_2.4.0
## [7] cellranger_1.1.0 plyr_1.8.4 backports_1.1.2
## [10] stats4_3.5.1 RSQLite_2.1.1 evaluate_0.11
## [13] pillar_1.3.1 rlang_0.3.1 progress_1.2.0
## [16] lazyeval_0.2.1 blob_1.1.1 S4Vectors_0.20.1
## [19] Matrix_1.2-14 rmarkdown_1.10 stringr_1.4.0
## [22] htmlwidgets_1.2 bit_1.1-14 munsell_0.5.0
## [25] compiler_3.5.1 pkgconfig_2.0.2 BiocGenerics_0.28.0
## [28] htmltools_0.3.6 tidyselect_0.2.4 expm_0.999-2
## [31] tibble_2.0.1 IRanges_2.16.0 XML_3.98-1.12
## [34] viridisLite_0.3.0 crayon_1.3.4 withr_2.1.2
## [37] grid_3.5.1 gtable_0.2.0 DBI_1.0.0
## [40] magrittr_1.5 scales_1.0.0 stringi_1.3.1
## [43] bindrcpp_0.2.2 tools_3.5.1 bit64_0.9-7
## [46] Biobase_2.42.0 glue_1.3.0 purrr_0.2.5
## [49] hms_0.4.2 parallel_3.5.1 yaml_2.1.19
## [52] AnnotationDbi_1.44.0 colorspace_1.3-2 memoise_1.1.0
## [55] knitr_1.20 bindr_0.1.1
print(paste("susieR ", packageVersion("susieR"))) ## [1] "susieR 0.6.2.411"
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS:
# root <- "../../.."
root <- "~/../../../sc/orga/projects"
Data_dirs = list(
# ++++++++ GWAS SUMMARY STATS ++++++++ #
# Nall et al. (2019) w/ 23andMe
"Nalls_23andMe" = list(type="Parkinson's GWAS",
topSS="Data/Parkinsons/Nalls_23andMe/Nalls2019_TableS2.xlsx",
# fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/23andme/PD_all_post30APRIL2015_5.2_extended.txt")),
fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/combined_meta/nallsEtAl2019_allSamples_allVariants.mod.txt"),
reference="https://www.biorxiv.org/content/10.1101/388165v3"),
## IGAP
"Lambert_2013" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Lambert_2013/Lambert_2019_AD_GWAS.xlsx",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Lambert_2013/IGAP_stage_1.1000G.phase3.20130502.tsv"),
reference="https://www.nature.com/articles/ng.2802"),
## Marioni et al. (2018)
"Marioni_2018" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Marioni_2018/Marioni2018_supplementary_tables.xlsm",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv"),
reference="https://www.nature.com/articles/s41398-018-0150-6"),
## Jansen et al. (2018)
"Posthuma_2018" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Posthuma_2018/Posthuma2018_suppTables.xlsx",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Posthuma_2018/phase3.beta.se.hrc.txt"),
reference="https://www.nature.com/articles/s41588-018-0311-9"),
## Kunkle et al. (2018) Alzheimer's GWAS
"Kunkle_2019" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Kunkle_2019/Kunkle2019_supplementary_tables.xlsx",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_2019/Kunkle_etal_Stage1_results.txt"),
# fullSS_stage2=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_etal_Stage2_results.txt"),
reference="https://www.nature.com/articles/s41588-019-0358-2"),
# ++++++++ eQTL SUMMARY STATS ++++++++ #
## MESA eQTLs: African Americans
"MESA_AFA" = list(type="eQTL",
topSS="Data/eQTL/MESA/AFA_eQTL_PTK2B.txt",
fullSS=file.path(root,"ad-omics/data/mesa/AFA_cis_eqtl_summary_statistics.txt"),
reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
## MESA eQTLs: Caucasians
"MESA_CAU" = list(type="eQTL",
topSS="Data/eQTL/MESA/CAU_eQTL_PTK2B.txt",
fullSS=file.path(root,"ad-omics/data/mesa/CAU_cis_eqtl_summary_statistics.txt"),
reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
## MESA eQTLs: Hispanics
"MESA_HIS" = list(type="eQTL",
topSS="Data/eQTL/MESA/HIS_eQTL_PTK2B.txt",
fullSS=file.path(root,"ad-omics/data/mesa/HIS_cis_eqtl_summary_statistics.txt"),
reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa")
)
Data_dirs_table <- Data_dirs_to_table(Data_dirs, writeCSV = "Results/directories_table.csv")
# Direct to local data files if not working on Minerva
if(getwd()=="/Users/schilder/Desktop/Fine_Mapping"){
vcf_folder = F # Use internet if I'm working from my laptop
} else{
vcf_folder = F#"../1000_Genomes_VCFs/Phase1" # Use previously downloaded files if working on Minerva node
}
force_new_subset = F
allResults <- list()“Rare coding variant burden analysis:
We performed kernel-based burden tests on the 113 genes in our PD GWAS loci that contained two or more rare coding variants (MAF< 5% or MAF < 1%). After Bonferroni correction for 113 genes, we identified 7 significant putatively causal genes: - LRRK2, GBA, CATSPER3 (rs11950533/C5orf24 locus) - LAMB2 (rs12497850/IP6K2 locus) - LOC442028 (rs2042477/KCNIP3 locus) - NFKB2 (rs10748818/GBF1 locus), and SCARB2 (rs6825004 locus).”
– from Nalls et al. (2019)
dataset_name <- "Nalls_23andMe"
top_SNPs <- import_topSNPs(
file_path = Data_dirs[[dataset_name]]$topSS,
chrom_col = "CHR", position_col = "BP", snp_col="SNP",
pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats")
gene_list <- c("LRRK2","SNCA","VPS13C","C5orf24","IP6K2","KCNIP3","GBF1","SCARB2")#, top_SNPs$Gene) %>% unique()
allResults[[dataset_name]] <- finemap_geneList(top_SNPs,
geneList=gene_list,
file_path= Data_dirs[[dataset_name]]$fullSS,#"./LRRK2_EUR_subset.txt",#
chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
pval_col = "p", effect_col = "beta", stderr_col = "se",
vcf_folder = vcf_folder, superpopulation = "EUR", force_new_subset = force_new_subset) Extracting SNPs flanking lead SNP… —Min snp position: 40234202 — —Max snp position: 41234202 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/LRRK2_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-4819.31771667441” [1] “objective:-4817.53694272212” [1] “objective:-4817.5346354578” [1] “objective:-4817.53463247054” [1] “objective:-4817.5346324668” [1] “objective:-4817.53463246679”
Credible Set:
****** 5 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 90126111 — —Max snp position: 91126111 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/SNCA_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/SNCA_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-3916.13477060794” [1] “objective:-3885.12408928578” [1] “objective:-3885.10414431053” [1] “objective:-3885.10413226354” [1] “objective:-3885.10413225772” [1] “objective:-3885.10413225772”
Credible Set:
****** 1 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 61497385 — —Max snp position: 62497385 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/VPS13C_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/VPS13C_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-3963.78730870254” [1] “objective:-3963.32612301913” [1] “objective:-3963.3257570866” [1] “objective:-3963.32575679555”
Credible Set:
****** 8 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 133699105 — —Max snp position: 134699105 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/C5orf24_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/C5orf24_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-2730.39327779333” [1] “objective:-2730.27173484353” [1] “objective:-2730.27159727042” [1] “objective:-2730.27159711459”
Credible Set:
****** 37 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 48248989 — —Max snp position: 49248989 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/IP6K2_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/IP6K2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-1732.44890838796” [1] “objective:-1732.18758589222” [1] “objective:-1732.18663923239” [1] “objective:-1732.18663574514”
Credible Set:
— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 95500943 — —Max snp position: 96500943 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/KCNIP3_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/KCNIP3_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-1045.5376558212” [1] “objective:-1045.2548831746” [1] “objective:-1045.25378394202” [1] “objective:-1045.25377968046” [1] “objective:-1045.25377966438” [1] “objective:-1045.25377966432”
Credible Set:
****** 58 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 103515279 — —Max snp position: 104515279 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/GBF1_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/GBF1_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-2327.33554736521” [1] “objective:-2327.14720605155” [1] “objective:-2327.14644032417” [1] “objective:-2327.14643727145”
Credible Set:
— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Extracting SNPs flanking lead SNP… —Min snp position: 76610365 — —Max snp position: 77610365 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/SCARB2_combined_meta_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/SCARB2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-4438.64199045971” [1] “objective:-4438.05391400102” [1] “objective:-4438.05352828151” [1] “objective:-4438.05352803242”
Credible Set:
****** 4 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 207 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 207 rows containing missing values (geom_segment).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
#c("LRRK2","SNCA","VPS13C","GCH1"),
# "GBA","CATSPER3", "LAMB2", "LOC442028", "NFKB2", "SCARB2", "GRN"dataset_name <- "Posthuma_2018"
top_SNPs <- import_topSNPs(
file_path = Data_dirs[[dataset_name]]$topSS,
sheet = "Table S8",
chrom_col = "Chr", position_col = "bp", snp_col="SNP",
pval_col="P", effect_col="Zscore", gene_col="nearestGene",
caption= "Posthuma et al. (2018) AD GWAS Summary Stats")
allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Posthuma_2018/phase3.beta.se.hrc.txt",#
effect_col = "BETA", stderr_col = "SE", position_col = "BP",
snp_col = "SNP", pval_col = "P",
vcf_folder = vcf_folder, superpopulation = "EUR",
minPos=NULL, maxPos=27440000, force_new_subset = force_new_subset)Extracting SNPs flanking lead SNP… —Min snp position: 26695121 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Posthuma_2018/PTK2B_Posthuma_2018_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-2946.7022383842” [1] “objective:-2946.45848339141” [1] “objective:-2946.45830481344” [1] “objective:-2946.45830468346”
Credible Set:
****** 6 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
dataset_name <- "Marioni_2018"
top_SNPs <- import_topSNPs(
file_path = Data_dirs[[dataset_name]]$topSS,
sheet = "Table S9",
chrom_col = "topSNP_chr", position_col = "topSNP_bp", snp_col="topSNP",
pval_col="p_GWAS", effect_col="b_GWAS", gene_col="Gene",
caption= "Marioni et al. (2018) AD GWAS Summary Stats")
allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv", #,
effect_col = "BETA", stderr_col = "SE", position_col = "POS",
snp_col = "SNP",chrom_col = "CHROM", pval_col = "PVAL",
vcf_folder = vcf_folder, superpopulation = "EUR",
minPos=NULL, maxPos=27440000, force_new_subset = force_new_subset)Extracting SNPs flanking lead SNP… —Min snp position: 26688518 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Marioni_2018/PTK2B_Marioni_2018_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-1966.3994025023” [1] “objective:-1965.70884939607” [1] “objective:-1965.70807253082” [1] “objective:-1965.70807165376”
Credible Set:
****** 5 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 354 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 354 rows containing missing values (geom_segment).
## Warning: Removed 5 rows containing missing values (geom_text_repel).
Filtering out the CLU portion of the locus prevents susieR from identifying a credible set.
dataset_name <- "Lambert_2013"
top_SNPs <- import_topSNPs(
file_path = Data_dirs[[dataset_name]]$topSS,
sheet = 1,
chrom_col = "CHR", position_col = "Position", snp_col="SNP",
pval_col="Overall_P", effect_col="Overall_OR", gene_col="Closest gene",
caption= "Lambert (2013) (IGAP): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )
allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Lambert_2013/IGAP_stage_1.1000G.phase3.20130502.tsv",#
chrom_col = "CHROM", pval_col = "PVAL", snp_col = "SNP",
effect_col = "BETA", stderr_col = "SE", position_col = "POS",
vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR",
minPos=NULL, maxPos=27440000, force_new_subset = force_new_subset) Extracting SNPs flanking lead SNP… —Min snp position: 26695121 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Lambert_2013/PTK2B_Lambert_2013_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-1875.33189392468” [1] “objective:-1875.12745540191” [1] “objective:-1875.12685678814” [1] “objective:-1875.12685505843”
Credible Set:
— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Exclude CLU-associated portion of locus by removing all SNPs over position 8:27440000.
The CLU-gene technically starts at 8:27456253, but its associated SNPs are further upstream.
dataset_name <- "Kunkle_2019"
top_SNPs <- import_topSNPs(
file_path = Data_dirs[[dataset_name]]$topSS,
sheet = "Supplementary Table 6",
chrom_col = "Chr", position_col = "Basepair", snp_col="SNP",
pval_col="P", effect_col="Beta", gene_col="LOCUS",
caption= "Kunkle (2019) (IGAP): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )
allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Kunkle_2019/Kunkle_etal_Stage1_results.txt",#
chrom_col = "Chromosome", position_col = "Position", pval_col = "Pvalue",
snp_col = "MarkerName", effect_col = "Beta", stderr_col = "SE",
vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR",
minPos=NULL, maxPos=27440000, file_sep=" ", force_new_subset = force_new_subset) # Exclude CLU-associated portion of locusExtracting SNPs flanking lead SNP… —Min snp position: 26719987 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Kunkle_2019/PTK2B_Kunkle_2019_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-2968.55924360798” [1] “objective:-2968.40951319364” [1] “objective:-2968.40927818658” [1] “objective:-2968.40927781”
Credible Set:
****** 5 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
dataset_name <- "MESA_AFA"
superpopulation <- "AFA"
allResults[[dataset_name]] <- finemap_eQTL(superpopulation, gene = "PTK2B",
fullSS_path = Data_dirs[[dataset_name]]$fullSS,
force_new_subset = force_new_subset)Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AFR_subset.txt …
## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored
awk -F " " ‘NR==1{print $0}{ if(($7 == 8) && ($12 >= 26695121 && $12 <= 27695121)) { print } }’ Data/eQTL/MESA/PTK2B_AFR_subset.txt > Data/eQTL/MESA/PTK2B_MESA_subset.txt Extraction completed in 0.12 seconds Calculating Standard Error…
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-4452.41334407999” [1] “objective:-4452.272858439” [1] “objective:-4452.27277053595” [1] “objective:-4452.27277048098”
Credible Set:
****** 2 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
dataset_name <- "MESA_CAU"
superpopulation <- "CAU"
allResults[[dataset_name]] <- finemap_eQTL(superpopulation, gene = "PTK2B",
fullSS_path = Data_dirs[[dataset_name]]$fullSS,
force_new_subset = force_new_subset)Subset file already exists. Importing Data/eQTL/MESA/PTK2B_EUR_subset.txt …
## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored
awk -F " " ‘NR==1{print $0}{ if(($7 == 8) && ($12 >= 26695121 && $12 <= 27695121)) { print } }’ Data/eQTL/MESA/PTK2B_EUR_subset.txt > Data/eQTL/MESA/PTK2B_MESA_subset.txt Extraction completed in 0.07 seconds Calculating Standard Error…
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-2439.61831759579” [1] “objective:-2438.71996233951” [1] “objective:-2438.71907514181” [1] “objective:-2438.71907425949”
Credible Set:
****** 5 SNPs included in Credible Set ******
## Warning: Removed 1 rows containing missing values (geom_hline).
Used the Ad Mixed American (AMR) reference panel given the absence of a Hispanic reference panel for 1KG_Phase1.
dataset_name <- "MESA_HIS"
superpopulation <- "HIS"
allResults[[dataset_name]] <- finemap_eQTL(superpopulation, gene = "PTK2B",
fullSS_path = Data_dirs[[dataset_name]]$fullSS,
force_new_subset = force_new_subset)Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AMR_subset.txt …
## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored
awk -F " " ‘NR==1{print $0}{ if(($7 == 8) && ($12 >= 26719987 && $12 <= 27719987)) { print } }’ Data/eQTL/MESA/PTK2B_AMR_subset.txt > Data/eQTL/MESA/PTK2B_MESA_subset.txt Extraction completed in 0.08 seconds Calculating Standard Error…
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-3444.59629242866” [1] “objective:-3444.46011107977” [1] “objective:-3444.45983730976” [1] “objective:-3444.4598367586”
Credible Set:
— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******
## Warning: Removed 1 rows containing missing values (geom_hline).
merged_results <- merge_finemapping_results(allResults, csv_path ="Results/merged_finemapping_results.csv")
createDT(merged_results, caption = "Finemapped Credible Sets for Multiple Datasets")## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
# table(as.character(merged_results$SNP))